library(tidyverse)
## -- Attaching packages ------------------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.4
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts --------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.6.3
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
data <- read.csv("C:/Data/spotify/data.csv")
data_w_genres <- read.csv("C:/Data/spotify/data_w_genres.csv")
head(data)
## acousticness artists danceability duration_ms
## 1 0.991000 ['Mamie Smith'] 0.598 168333
## 2 0.643000 ["Screamin' Jay Hawkins"] 0.852 150200
## 3 0.993000 ['Mamie Smith'] 0.647 163827
## 4 0.000173 ['Oscar Velazquez'] 0.730 422087
## 5 0.295000 ['Mixe'] 0.704 165224
## 6 0.996000 ['Mamie Smith & Her Jazz Hounds'] 0.424 198627
## energy explicit id instrumentalness key liveness loudness
## 1 0.224 0 0cS0A1fUEUd1EW3FcF8AEI 5.22e-04 5 0.3790 -12.628
## 2 0.517 0 0hbkKFIJm7Z05H8Zl9w30f 2.64e-02 5 0.0809 -7.261
## 3 0.186 0 11m7laMUgmOKqI3oYzuhne 1.76e-05 0 0.5190 -12.098
## 4 0.798 0 19Lc5SfJJ5O1oaxY0fpwfh 8.01e-01 2 0.1280 -7.311
## 5 0.707 1 2hJjbsLCytGsnAHfdsLejp 2.46e-04 10 0.4020 -6.036
## 6 0.245 0 3HnrHGLE9u2MjHtdobfWl9 7.99e-01 5 0.2350 -11.470
## mode name popularity
## 1 0 Keep A Song In Your Soul 12
## 2 0 I Put A Spell On You 7
## 3 1 Golfing Papa 4
## 4 1 True House Music - Xavier Santos & Carlos Gomix Remix 17
## 5 0 Xuniverxe 2
## 6 1 Crazy Blues - 78rpm Version 9
## release_date speechiness tempo valence year
## 1 1920 0.0936 149.976 0.6340 1920
## 2 1920-01-05 0.0534 86.889 0.9500 1920
## 3 1920 0.1740 97.600 0.6890 1920
## 4 1920-01-01 0.0425 127.997 0.0422 1920
## 5 1920-10-01 0.0768 122.076 0.2990 1920
## 6 1920 0.0397 103.870 0.4770 1920
head(data_w_genres)
## artists
## 1 "Cats" 1981 Original London Cast
## 2 "Cats" 1983 Broadway Cast
## 3 "Fiddler On The Roofâ\200\235 Motion Picture Chorus
## 4 "Fiddler On The Roofâ\200\235 Motion Picture Orchestra
## 5 "Joseph And The Amazing Technicolor Dreamcoat" 1991 London Cast
## 6 "Joseph And The Amazing Technicolor Dreamcoat" 1992 Canadian Cast
## acousticness danceability duration_ms energy instrumentalness liveness
## 1 0.5985000 0.4701000 267072.0 0.3762030 0.010260876 0.2830500
## 2 0.8625385 0.4417308 287280.0 0.4068077 0.081158264 0.3152154
## 3 0.8565714 0.3482857 328920.0 0.2865714 0.024592949 0.3257857
## 4 0.8849259 0.4250741 262891.0 0.2457704 0.073587279 0.2754815
## 5 0.5107143 0.4671429 270436.1 0.4882857 0.009400291 0.1950000
## 6 0.5911667 0.4843333 218504.5 0.3006083 0.007042273 0.1760667
## loudness speechiness tempo valence popularity key mode count
## 1 -14.43430 0.20915000 114.12880 0.3583200 38.20000 5 1 10
## 2 -10.69000 0.17621154 103.04415 0.2688654 31.53846 5 1 26
## 3 -15.23071 0.11851429 77.37586 0.3548571 34.57143 0 1 7
## 4 -15.63937 0.12320000 88.66763 0.3720296 34.40741 0 1 27
## 5 -10.23671 0.09854286 122.83586 0.4822857 42.00000 5 1 7
## 6 -18.57950 0.10495833 122.51783 0.4245000 33.16667 9 1 24
## genres
## 1 ['show tunes']
## 2 []
## 3 []
## 4 []
## 5 []
## 6 []
key_map <- rev(c("0" = "C", "1" = "C#", "2" = "D", "3" = "D#", "4" = "E", "5" = "F", "6" = "F#", "7" = "G", "8" = "G#", "9" = "A", "10" = "A#", "11" = "B"))
data_2010_2020 <- data %>%
mutate(id = as.character(id)) %>%
#select(-id, -release_date) %>% # duplicate records for different ids and release date
filter(year <= 2020) %>%
distinct() %>%
# same song with same artist have multiple records with slight changes in audio featues and year
group_by(name, artists) %>%
filter(id==min(id)) %>%
#filter(popularity == max(popularity)) %>%
ungroup() %>%
mutate(popularity_category = ifelse(popularity >= 80, "80+", "<80"),
valence_bin = cut(valence, seq(0,1,0.1), right = FALSE),
duration_min = duration_ms/(1000*60),
mode_type = case_when(mode==0 ~ "minor",
mode==1 ~ "major"),
key_str = as.character(key),
key_group = str_replace_all(key_str, key_map))
summary(data_2010_2020)
## acousticness artists danceability
## Min. :0.0000 ['Tadeusz Dolega Mostowicz'] : 1281 Min. :0.000
## 1st Qu.:0.0926 ['ÐÑ\200неÑ\201Ñ‚ ХемингуÑ\215й'] : 1175 1st Qu.:0.413
## Median :0.5310 ['ÐÑ\200их МаÑ\200иÑ\217 РемаÑ\200к']: 1062 Median :0.546
## Mean :0.5045 ['Francisco Canaro'] : 918 Mean :0.535
## 3rd Qu.:0.8970 ['Frank Sinatra'] : 586 3rd Qu.:0.667
## Max. :0.9960 ['Ignacio Corsini'] : 555 Max. :0.988
## (Other) :153004
## duration_ms energy explicit id
## Min. : 4937 Min. :0.0000 Min. :0.0000 Length:158581
## 1st Qu.: 165440 1st Qu.:0.2460 1st Qu.:0.0000 Class :character
## Median : 205027 Median :0.4590 Median :0.0000 Mode :character
## Mean : 232021 Mean :0.4792 Mean :0.0701
## 3rd Qu.: 265000 3rd Qu.:0.7050 3rd Qu.:0.0000
## Max. :5338302 Max. :1.0000 Max. :1.0000
##
## instrumentalness key liveness loudness
## Min. :0.000000 Min. : 0.000 Min. :0.0000 Min. :-60.000
## 1st Qu.:0.000000 1st Qu.: 2.000 1st Qu.:0.0998 1st Qu.:-14.998
## Median :0.000505 Median : 5.000 Median :0.1390 Median :-10.916
## Mean :0.194571 Mean : 5.204 Mean :0.2129 Mean :-11.824
## 3rd Qu.:0.237000 3rd Qu.: 8.000 3rd Qu.:0.2730 3rd Qu.: -7.572
## Max. :1.000000 Max. :11.000 Max. :1.0000 Max. : 3.855
##
## mode name popularity release_date
## Min. :0.000 White Christmas : 90 Min. : 0.00 1945 : 1272
## 1st Qu.:0.000 Winter Wonderland: 77 1st Qu.: 1.00 1935 : 1081
## Median :1.000 Silent Night : 74 Median :26.00 1949 : 1070
## Mean :0.703 Jingle Bells : 59 Mean :25.75 1926 : 1010
## 3rd Qu.:1.000 2000 Years : 56 3rd Qu.:42.00 1950 : 968
## Max. :1.000 Sleigh Ride : 46 Max. :96.00 1948 : 942
## (Other) :158179 (Other):152238
## speechiness tempo valence year
## Min. :0.0000 Min. : 0.00 Min. :0.000 Min. :1920
## 1st Qu.:0.0352 1st Qu.: 93.65 1st Qu.:0.312 1st Qu.:1955
## Median :0.0456 Median :115.64 Median :0.536 Median :1976
## Mean :0.1092 Mean :116.88 Mean :0.525 Mean :1976
## 3rd Qu.:0.0773 3rd Qu.:135.00 3rd Qu.:0.743 3rd Qu.:1998
## Max. :0.9710 Max. :243.51 Max. :1.000 Max. :2020
##
## popularity_category valence_bin duration_min mode_type
## Length:158581 [0.5,0.6):19648 Min. : 0.08228 Length:158581
## Class :character [0.6,0.7):19000 1st Qu.: 2.75733 Class :character
## Mode :character [0.7,0.8):17914 Median : 3.41712 Mode :character
## [0.4,0.5):17264 Mean : 3.86701
## [0.3,0.4):17071 3rd Qu.: 4.41667
## (Other) :67680 Max. :88.97170
## NA's : 4
## key_str key_group
## Length:158581 Length:158581
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
data_2010_2020 %>%
count(year) %>%
ggplot(aes(x = year, y = n, group = 1)) +
geom_line() +
geom_point() +
scale_x_continuous(breaks = seq(1910, 2020, 10)) +
theme(axis.text.x = element_text(angle = 90))
data_2010_2020 %>%
group_by(year) %>%
summarise(mean_popularity = mean(popularity),
max_popularity = max(popularity)) %>%
ggplot(aes(x = year, group = 1)) +
geom_line(aes(y = mean_popularity), color = "blue") +
geom_line(aes(y = max_popularity), color = "green") +
scale_x_continuous(breaks = seq(1910, 2020, 10)) +
theme(axis.text.x = element_text(angle = 90))
audio_features <- c("acousticness", "danceability", "duration_min", "energy", "instrumentalness", "explicit", "liveness", "loudness", "key", "mode", "speechiness", "tempo", "valence")
data_2010_2020 %>%
filter(popularity==max(popularity)) %>%
pivot_longer(cols = audio_features, names_to = "feature_name", values_to = "feature_value") %>%
ggplot(aes(x = name, y = feature_value, fill = name)) +
geom_col() +
facet_wrap(~feature_name, scales = "free_y")
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(audio_features)` instead of `audio_features` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
rescale <- function(x) (x-min(x))/(max(x) - min(x))
scales_data_2010_2020 <- data_2010_2020 %>%
mutate(year = as.character(year)) %>%
mutate_if(is.numeric, ~rescale(.)) %>%
mutate(year = as.integer(year))
audio_features_2 <- c("acousticness", "danceability", "energy", "instrumentalness", "liveness", "loudness", "speechiness", "tempo", "valence")
scales_data_2010_2020 %>%
pivot_longer(cols = audio_features_2, names_to = "feature_name", values_to = "feature_value") %>%
group_by(year, feature_name) %>%
summarise(mean_feature_value = mean(feature_value)) %>%
ungroup() %>%
ggplot(aes(x = year, y = mean_feature_value, color = feature_name)) +
geom_line() +
geom_point() +
scale_x_continuous(breaks = seq(1910, 2020, 10)) +
scale_color_brewer(palette = "Set3")
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(audio_features_2)` instead of `audio_features_2` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
data_2010_2020 %>%
count(year, mode_type) %>%
ggplot(aes(x = year, y = n, fill = mode_type, group = 1)) +
geom_col() +
scale_x_continuous(breaks = seq(1910, 2020, 10))
data_2010_2020 %>%
count(year, explicit) %>%
ggplot(aes(x = year, y = n, fill = factor(explicit))) +
geom_col() +
scale_x_continuous(breaks = seq(1910, 2020, 10))
data_2010_2020 %>%
count(year, key_group) %>%
group_by(year) %>%
mutate(perc = n/sum(n)) %>%
ggplot(aes(x = year, y = perc, fill = key_group)) +
geom_col() +
scale_fill_brewer(palette = "Paired") +
scale_x_continuous(breaks = seq(1910, 2020, 10))
data_2010_2020 %>%
group_by(year) %>%
mutate(mean_dur = mean(duration_min)) %>%
ggplot(aes(x = year, y = mean_dur)) +
geom_line() + geom_point() +
scale_x_continuous(breaks = seq(1910, 2020, 10))
data_2010_2020 %>%
filter(danceability==max(danceability))
## # A tibble: 1 x 25
## acousticness artists danceability duration_ms energy explicit id
## <dbl> <fct> <dbl> <int> <dbl> <int> <chr>
## 1 0.0755 ['Tone~ 0.988 248160 0.633 0 5YIF~
## # ... with 18 more variables: instrumentalness <dbl>, key <int>,
## # liveness <dbl>, loudness <dbl>, mode <int>, name <fct>, popularity <int>,
## # release_date <fct>, speechiness <dbl>, tempo <dbl>, valence <dbl>,
## # year <int>, popularity_category <chr>, valence_bin <fct>,
## # duration_min <dbl>, mode_type <chr>, key_str <chr>, key_group <chr>
data_2010_2020 %>%
filter(tempo==max(tempo))
## # A tibble: 1 x 25
## acousticness artists danceability duration_ms energy explicit id
## <dbl> <fct> <dbl> <int> <dbl> <int> <chr>
## 1 0.497 ['Bill~ 0.535 277221 0.38 0 56n7~
## # ... with 18 more variables: instrumentalness <dbl>, key <int>,
## # liveness <dbl>, loudness <dbl>, mode <int>, name <fct>, popularity <int>,
## # release_date <fct>, speechiness <dbl>, tempo <dbl>, valence <dbl>,
## # year <int>, popularity_category <chr>, valence_bin <fct>,
## # duration_min <dbl>, mode_type <chr>, key_str <chr>, key_group <chr>
ggplotly(
melt(cor(data_2010_2020 %>%
select(c(audio_features_2, "popularity")))) %>%
ggplot(aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = "blue", high = "orange", mid = "purple") +
theme(axis.text.x = element_text(angle = 90))
)
data_2010_2020 %>%
mutate(acoustic_bin = cut(acousticness, seq(0,1.1,0.0001), right = FALSE)) %>%
group_by(acoustic_bin) %>%
summarise(mean_popularity = mean(popularity),
mean_acoustic = mean(acousticness)) %>%
ungroup() %>%
ggplot(aes(x = mean_acoustic, y = mean_popularity)) +
geom_point(alpha = 0.2, size = 3) +
geom_smooth(method = "lm")
#geom_point(alpha = 0.8, size = 3, color = "red", pch = 21, fill = "black")
data_2010_2020 %>%
mutate(energy_bin = cut(energy, seq(0,1.1,0.0001), right = FALSE)) %>%
group_by(energy_bin) %>%
summarise(mean_popularity = mean(popularity),
mean_energy = mean(energy)) %>%
ungroup() %>%
ggplot(aes(x = mean_energy, y = mean_popularity)) +
geom_point(alpha = 0.2, size = 3) +
geom_smooth(method = "lm")
#geom_point(alpha = 0.8, size = 3, color = "red", pch = 21, fill = "black")
data_2010_2020 %>%
select(energy) %>% pull() %>% min()
## [1] 0
data_2010_2020 %>%
mutate(loudness_bin = cut(loudness, seq(0,-60,-0.01), right = FALSE)) %>%
group_by(loudness_bin) %>%
summarise(mean_popularity = mean(popularity),
mean_loudness = mean(loudness)) %>%
ungroup() %>%
ggplot(aes(x = mean_loudness, y = mean_popularity)) +
geom_point(alpha = 0.2, size = 3) +
geom_smooth(method = "lm")
## Warning: Factor `loudness_bin` contains implicit NA, consider using
## `forcats::fct_explicit_na`
#geom_point(alpha = 0.8, size = 3, color = "red", pch = 21, fill = "black")
data_2010_2020 %>%
select(loudness) %>% pull() %>% min()
## [1] -60
data_2010_2020 %>%
mutate(instr_bin = cut(instrumentalness, seq(0,1,0.0001), right = FALSE)) %>%
group_by(instr_bin) %>%
summarise(mean_popularity = mean(popularity),
mean_instr = mean(instrumentalness)) %>%
ungroup() %>%
ggplot(aes(x = mean_instr, y = mean_popularity)) +
geom_point(alpha = 0.2, size = 3) +
geom_smooth(method = "lm")
## Warning: Factor `instr_bin` contains implicit NA, consider using
## `forcats::fct_explicit_na`
#geom_point(alpha = 0.8, size = 3, color = "red", pch = 21, fill = "black")
data_2010_2020 %>%
select(loudness) %>% pull() %>% min()
## [1] -60
data_2010_2020 %>%
mutate(dance_bin = cut(danceability, seq(0,1,0.0001), right = FALSE)) %>%
group_by(dance_bin) %>%
summarise(mean_valence = mean(valence),
mean_dance = mean(danceability)) %>%
ungroup() %>%
ggplot(aes(x = mean_dance, y = mean_valence)) +
geom_point(alpha = 0.2, size = 3) +
geom_smooth(method = "lm")
#geom_point(alpha = 0.8, size = 3, color = "red", pch = 21, fill = "black")
data_2010_2020 %>%
group_by(artists) %>%
summarise(n_songs = n(),
first_activity = min(year),
last_activity = max(year)) %>%
ungroup() %>%
mutate(years_active = last_activity - first_activity + 1) %>%
arrange(desc(n_songs)) %>%
head(20) %>%
ggplot(aes(x = reorder(artists, n_songs), y = n_songs, fill = years_active)) +
geom_col() +
coord_flip()
genre_group <- c("pop","indie","rock","metal","rap","jazz", "classical")
data_w_genres %>%
mutate(genre_group = case_when(grepl("pop", genres) ~ "pop",
grepl("indie", genres) ~ "indie",
grepl("rock", genres) ~ "rock",
grepl("metal", genres) ~ "metal",
grepl("rap", genres) ~ "rap",
grepl("jazz", genres) ~ "jazz",
grepl("classical", genres) ~ "classical",
grepl("tango", genres) ~ "tango",
TRUE ~ "other")) %>%
arrange(desc(count)) %>%
head(30) %>%
ggplot(aes(x = reorder(artists, count), y = count, fill = genre_group)) +
geom_col() +
coord_flip()
data_w_genres %>%
mutate(genre_group = case_when(grepl("pop", genres) ~ "pop",
grepl("indie", genres) ~ "indie",
grepl("rock", genres) ~ "rock",
grepl("metal", genres) ~ "metal",
grepl("rap", genres) ~ "rap",
grepl("jazz", genres) ~ "jazz",
grepl("classical", genres) ~ "classical",
grepl("tango", genres) ~ "tango",
TRUE ~ "other")) %>%
arrange(desc(count)) %>%
head(30) %>%
ggplot(aes(x = reorder(artists, count), y = popularity, fill = genre_group)) +
geom_col() +
coord_flip()
data_w_genres %>%
mutate(genre_group = case_when(grepl("pop", genres) ~ "pop",
grepl("indie", genres) ~ "indie",
grepl("rock", genres) ~ "rock",
grepl("metal", genres) ~ "metal",
grepl("rap", genres) ~ "rap",
grepl("jazz", genres) ~ "jazz",
grepl("classical", genres) ~ "classical",
grepl("tango", genres) ~ "tango",
TRUE ~ "other")) %>%
arrange(desc(popularity)) %>%
head(30) %>%
ggplot(aes(x = reorder(artists, popularity), y = popularity, fill = genre_group)) +
geom_col() +
coord_flip()
data_w_genres %>%
#mutate(dance_bin = cut(danceability, seq(0,1,0.0001), right = FALSE)) %>%
#group_by(dance_bin) %>%
#summarise(mean_valence = mean(valence),
# mean_dance = mean(danceability)) %>%
#ungroup() %>%
ggplot(aes(x = count, y = popularity)) +
geom_point(alpha = 0.2, size = 3)
cor(data_w_genres$count, data_w_genres$popularity)
## [1] -0.000227884
data_2010_2020 %>%
filter(artists=="['Lata Mangeshkar']") %>%
arrange(desc(popularity))
## # A tibble: 323 x 25
## acousticness artists danceability duration_ms energy explicit id
## <dbl> <fct> <dbl> <int> <dbl> <int> <chr>
## 1 0.983 ['Lata~ 0.433 238267 0.497 0 2DG0~
## 2 0.904 ['Lata~ 0.447 353653 0.427 0 0mMs~
## 3 0.788 ['Lata~ 0.389 354427 0.372 0 5M7L~
## 4 0.308 ['Lata~ 0.611 220107 0.293 0 5nqD~
## 5 0.875 ['Lata~ 0.566 315613 0.412 0 1O5q~
## 6 0.988 ['Lata~ 0.731 188560 0.314 0 4CHM~
## 7 0.907 ['Lata~ 0.435 509466 0.449 0 167v~
## 8 0.988 ['Lata~ 0.425 286297 0.449 0 5y1l~
## 9 0.974 ['Lata~ 0.551 242507 0.323 0 2QT3~
## 10 0.962 ['Lata~ 0.721 153453 0.293 0 5PTS~
## # ... with 313 more rows, and 18 more variables: instrumentalness <dbl>,
## # key <int>, liveness <dbl>, loudness <dbl>, mode <int>, name <fct>,
## # popularity <int>, release_date <fct>, speechiness <dbl>, tempo <dbl>,
## # valence <dbl>, year <int>, popularity_category <chr>, valence_bin <fct>,
## # duration_min <dbl>, mode_type <chr>, key_str <chr>, key_group <chr>
scales_data_2010_2020 %>%
filter(artists=="['Lata Mangeshkar']") %>%
pivot_longer(audio_features_2, names_to = "feature_name", values_to = "feature_value") %>%
ggplot(aes(x = feature_name, y = feature_value, color = feature_name)) +
geom_jitter(size = 3, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 90))
data_2010_2020 %>%
filter(artists=="['The Beatles']") %>%
arrange(desc(popularity))
## # A tibble: 327 x 25
## acousticness artists danceability duration_ms energy explicit id
## <dbl> <fct> <dbl> <int> <dbl> <int> <chr>
## 1 0.0302 ['The ~ 0.533 259947 0.376 0 2Eql~
## 2 0.879 ['The ~ 0.332 125667 0.179 0 3BQH~
## 3 0.0112 ['The ~ 0.386 425653 0.607 0 0aym~
## 4 0.754 ['The ~ 0.686 138387 0.127 0 5jgF~
## 5 0.641 ['The ~ 0.482 155227 0.849 0 5ZBe~
## 6 0.386 ['The ~ 0.49 145747 0.715 0 4pbG~
## 7 0.198 ['The ~ 0.396 182293 0.338 0 0pNe~
## 8 0.336 ['The ~ 0.39 247320 0.502 0 3Am0~
## 9 0.232 ['The ~ 0.818 188960 0.728 0 1gFN~
## 10 0.0205 ['The ~ 0.453 285000 0.654 0 389Q~
## # ... with 317 more rows, and 18 more variables: instrumentalness <dbl>,
## # key <int>, liveness <dbl>, loudness <dbl>, mode <int>, name <fct>,
## # popularity <int>, release_date <fct>, speechiness <dbl>, tempo <dbl>,
## # valence <dbl>, year <int>, popularity_category <chr>, valence_bin <fct>,
## # duration_min <dbl>, mode_type <chr>, key_str <chr>, key_group <chr>
scales_data_2010_2020 %>%
filter(artists=="['The Beatles']") %>%
pivot_longer(audio_features_2, names_to = "feature_name", values_to = "feature_value") %>%
ggplot(aes(x = feature_name, y = feature_value, color = feature_name)) +
geom_jitter(size = 3, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 90))
data_2010_2020 %>%
filter(artists=="['The Beatles']") %>%
group_by(year) %>%
summarise(mean_popularity = mean(popularity),
total_popularity = sum(popularity),
n_songs = n()) %>%
ungroup() %>%
ggplot(aes(x = year)) +
geom_line(aes(y = mean_popularity), color = "blue") +
geom_line(aes(y = n_songs), color = "green") +
scale_x_continuous(breaks = seq(1910, 2020, 10))
data_2010_2020 %>%
filter(artists=="['Queen']") %>%
arrange(desc(popularity))
## # A tibble: 320 x 25
## acousticness artists danceability duration_ms energy explicit id
## <dbl> <fct> <dbl> <int> <dbl> <int> <chr>
## 1 0.0472 ['Quee~ 0.563 209413 0.865 0 7hQJ~
## 2 0.112 ['Quee~ 0.933 214653 0.528 0 57JV~
## 3 0.679 ['Quee~ 0.693 122067 0.497 0 54fl~
## 4 0.714 ['Quee~ 0.599 163373 0.762 0 35It~
## 5 0.215 ['Quee~ 0.304 261627 0.42 0 3Aym~
## 6 0.0319 ['Quee~ 0.337 255600 0.684 0 4igI~
## 7 0.378 ['Quee~ 0.268 179200 0.459 0 7ccI~
## 8 0.566 ['Quee~ 0.545 173173 0.454 0 1mnQ~
## 9 0.057 ['Quee~ 0.557 209600 0.761 0 3lrN~
## 10 0.414 ['Quee~ 0.306 277827 0.686 0 3hU6~
## # ... with 310 more rows, and 18 more variables: instrumentalness <dbl>,
## # key <int>, liveness <dbl>, loudness <dbl>, mode <int>, name <fct>,
## # popularity <int>, release_date <fct>, speechiness <dbl>, tempo <dbl>,
## # valence <dbl>, year <int>, popularity_category <chr>, valence_bin <fct>,
## # duration_min <dbl>, mode_type <chr>, key_str <chr>, key_group <chr>
scales_data_2010_2020 %>%
filter(artists=="['Queen']") %>%
pivot_longer(audio_features_2, names_to = "feature_name", values_to = "feature_value") %>%
ggplot(aes(x = feature_name, y = feature_value, color = feature_name)) +
geom_jitter(size = 3, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 90))
data_2010_2020 %>%
filter(artists=="['Queen']") %>%
group_by(year) %>%
summarise(mean_popularity = mean(popularity),
total_popularity = sum(popularity)) %>%
ungroup() %>%
ggplot(aes(x = year, y = mean_popularity)) +
geom_line() +
geom_point() +
scale_x_continuous(breaks = seq(1910, 2020, 10))
data_2010_2020 %>%
filter(artists=="['Coldplay']") %>%
arrange(desc(popularity))
## # A tibble: 49 x 25
## acousticness artists danceability duration_ms energy explicit id
## <dbl> <fct> <dbl> <int> <dbl> <int> <chr>
## 1 0.00239 ['Cold~ 0.429 266773 0.661 0 3AJw~
## 2 0.164 ['Cold~ 0.209 295533 0.417 0 7LVH~
## 3 0.0954 ['Cold~ 0.486 242373 0.617 0 1mea~
## 4 0.0509 ['Cold~ 0.449 278719 0.585 0 6nek~
## 5 0.131 ['Cold~ 0.312 242496 0.418 0 4fzy~
## 6 0.211 ['Cold~ 0.491 258267 0.693 0 3RiP~
## 7 0.00617 ['Cold~ 0.545 267867 0.675 0 0FDz~
## 8 0.599 ['Cold~ 0.577 307880 0.749 0 0BCP~
## 9 0.00205 ['Cold~ 0.638 263787 0.924 0 69ux~
## 10 0.748 ['Cold~ 0.371 227093 0.268 0 7D0R~
## # ... with 39 more rows, and 18 more variables: instrumentalness <dbl>,
## # key <int>, liveness <dbl>, loudness <dbl>, mode <int>, name <fct>,
## # popularity <int>, release_date <fct>, speechiness <dbl>, tempo <dbl>,
## # valence <dbl>, year <int>, popularity_category <chr>, valence_bin <fct>,
## # duration_min <dbl>, mode_type <chr>, key_str <chr>, key_group <chr>
scales_data_2010_2020 %>%
filter(artists=="['Coldplay']") %>%
pivot_longer(audio_features_2, names_to = "feature_name", values_to = "feature_value") %>%
ggplot(aes(x = feature_name, y = feature_value, color = feature_name)) +
geom_jitter(size = 3, alpha = 0.5) +
theme(axis.text.x = element_text(angle = 90))
data_2010_2020 %>%
filter(artists=="['Coldplay']") %>%
group_by(year) %>%
summarise(mean_popularity = mean(popularity),
total_popularity = sum(popularity)) %>%
ungroup() %>%
ggplot(aes(x = year, y = mean_popularity, label = year)) +
geom_line() +
geom_point() +
geom_text() +
scale_x_continuous(breaks = seq(1910, 2020, 10))